/* Event terminology:
--------------------------------------------|
   Terminology				| Start	|  End	|
----------------------------|-------|-------|  	
1: Car purchase in Norway	|	X	|		|
2: Used car import			|	X	|		|
3: De-registration			|	X	|	X	|
4: Re-registration			|	X	|	X	|
5: EU control				|	X	|	X	|
6: Export					|		|	X	|
7: Scrap					|		|	X	|
8: Keep						|		|	X	|
--------------------------------------------|

The dataset will be split into time periods for each car
"Start" denotes events that can mark the start of a time period
"End" denotes events that can mark the end of a time period

- The age of all cars will be t - freg_DMY
- The age of cars purchased in Norway can also be calculated as the time from
  the first event (new car purchase)  
  
*/

* ==============================================================================
/* First step: import dates for freg (1), reg (4), dereg (3) and scrap (7) */
* ==============================================================================
use "${newdata}lnr_regnr_year.dta", clear
bys carid (year): keep if _n == 1 /* Keeping the first observation for each car only*/
* Dates for first registration =================================================
keep carid ownerid /// ID VARS
	freg_DMY freg_date /// FIRST REGISTERED
	year_min year_max /// MIN AND MAX YEARS OBSERVED IN DATASET
	fuel sp_skilt rutine kj_art kjt_grup typekode stjaalet utfoert bruktimp /// ATTRIBUTES
	
gen event_DMY = freg_DMY
gen event_type = 1	
duplicates drop
save "${newdata}timeline", replace

* Dates for (re)registration ===================================================
use "${newdata}lnr_regnr_year.dta", clear
keep carid ownerid /// ID VARS
	freg_DMY freg_date /// FIRST REGISTERED
	year_min year_max /// MIN AND MAX YEARS OBSERVED IN DATASET
	fuel sp_skilt rutine kj_art kjt_grup typekode stjaalet utfoert bruktimp /// ATTRIBUTES
	reg_DMY
	
rename reg_DMY event_DMY
gen event_type = 4
duplicates drop
append using "${newdata}timeline"
compress
save "${newdata}timeline", replace

* Dates for de-registration ====================================================
use "${newdata}lnr_regnr_year.dta", clear
keep carid ownerid /// ID VARS
	freg_DMY freg_date /// FIRST REGISTERED
	year_min year_max /// MIN AND MAX YEARS OBSERVED IN DATASET
	fuel sp_skilt rutine kj_art kjt_grup typekode stjaalet utfoert bruktimp /// ATTRIBUTES
	avreg_DMY
	
rename avreg_DMY event_DMY
drop if event_DMY == .
gen event_type = 3	
duplicates drop

compress
append using "${newdata}timeline"
save "${newdata}timeline", replace

* Dates for scrapping ==========================================================
use "${newdata}lnr_regnr_year.dta", clear
keep carid ownerid /// ID VARS
	freg_DMY freg_date /// FIRST REGISTERED
	year_min year_max /// MIN AND MAX YEARS OBSERVED IN DATASET
	fuel sp_skilt rutine kj_art kjt_grup typekode stjaalet utfoert bruktimp /// ATTRIBUTES
	scrap_DMY
	
rename scrap_DMY event_DMY

drop if event_DMY == .
sort carid event_DMY
bys carid: keep if _n == _N /* Only keeping the last scrapping date per car */
gen event_type = 7	
duplicates drop

compress
append using "${newdata}timeline"
save "${newdata}timeline", replace

* EU controls ==================================================================

import sas using "${rawdata}kjl2017", clear
gen year = 2017 // Storing the "dataset year"

* Looping over remaining years, appending datasets and removing duplicates =====
forvalues i = 2016(-1)2005 {
	preserve 
		import sas using "${rawdata}kjl`i'" , clear					
		gen year = `i'
		tempfile to_append 
		save `to_append', replace
	restore 
	append using `to_append'
}
compress

* Only keeping the actual odometer registrations and dates
rename w17_1455_regnr carid
drop if ktrldato == "" & ( km == . | km == 0) 

* Saving the registration date as an event
gen event_DMY = date(ktrldato, "DM20Y")
format event_DMY %td
drop ktrldato
sort carid event_DMY year

keep carid event_DMY km retting year

bys carid event_DMY: egen year_max = max(year)
drop if year != year_max 
gen event_type = 5 /* event_type = 5 means an odometer reading */
replace retting = 0 if retting == .
keep carid event_DMY event_type km retting

* =========== Appending together with other event types ========================

compress
append using "${newdata}timeline"
gen ind = (event_type != 5)
bys carid: egen aux = total(ind)
drop if aux == 0 /* dropping cars that only appear in EU controls, 
					not the motor vehicle register */
drop ind aux
save "${newdata}timeline", replace

* =============

sort carid event_DMY event_type

* Filling in missing values for EU control events
foreach x of varlist ///
	ownerid freg_DMY year_min year_max fuel sp_skilt ///
	rutine kj_art kjt_grup typekode stjaalet utfoert bruktimp freg_date {
		bys carid: replace `x' = `x'[_n-1] if event_type == 5
}
order event_DMY event_type, a(freg_DMY)
order km retting, a(event_type)
save "${newdata}timeline", replace
* ==============================================================

use "${newdata}timeline", clear
bys carid (event_DMY event_type): gen event_nr = _n
order event_nr, a(event_type)

* Fixing start event ===========================================
/* Replacing reg and freg in case freg comes after reg*/
gen aux = (event_nr == 2 & event_type == 1)
replace aux = 2 if aux[_n+1] == 1

* Replace event types in cases where re-registration happens before first registration,
* such that the re-register is the first registration
replace event_type = 4 if aux == 1
replace event_type = 1 if aux == 2
drop aux
 
* ==============================================================================
/*
The car below does not make sense:
- typekode changes from 16 to 36
- EU controls are prior to first registration date
*/
drop if carid == "R01541266"
* ==============================================================================

* Doing the same where event_type == 1 for event_nr == 3
gen aux = (event_nr == 3 & event_type == 1)
bys carid (event_DMY event_type): replace aux = 2 if aux[_n+2] == 1 & event_type == 4
replace event_type = 4 if aux == 1 & event_type == 1
replace event_type = 1 if aux == 2 & event_type == 4
drop aux

* === ADDING USED CAR IMPORT AS AN EVENT =======================================
/* Rule: All cars with re-registration as second event (after first registration)
   that are marked as used car imports, will get the re-registration re-defined
   as event type = 2, i.e. event type is used car import */
gen aux = (bruktimp == 1 & event_type == 4 & event_nr == 2)
replace event_type = 2 if aux == 1
* For these cars, we drop the "new purchase" as event
bys carid: drop if event_type == 1 & _n == 1 & event_type[_n+1] == 2
/* Note: Other cars are also imported used (bruktimp == 1). However, these are more likely to have been imported prior to 2005 */
drop aux
* === Updating event number ===
sort carid event_DMY event_type
bys carid: gen aux = _n
replace event_nr = aux
drop aux

* ==============================================================================
bys carid: gen event_nr_bw = _N - _n
gen aux = (event_type == 7 & event_nr_bw != 0)
bys carid: egen ind = total(aux)
br if ind == 1
* Rule: If last event is deregister and second last event is scrap, drop the last event
sort carid event_DMY event_type
bys carid (event_DMY event_type): drop if event_type == 3 & event_type[_n-1] == 7 & event_nr_bw == 0
drop event_nr_bw
bys carid (event_DMY event_type): gen event_nr_bw = _N - _n

gen ind2 = (ind == 1 & event_type == 7)
bys carid: gen ind3 = sum(ind2)
/* Rule: dropping all events taking place after scrapping. */
drop if ind3 == 1 & event_type != 7 
drop ind* event_nr_bw

sort carid event_DMY event_type
bys carid (event_DMY event_type): gen event_nr_bw = _N - _n

* === INDICATOR FOR CARS THAT ARE EXPORTED =====================================
* RULE: cars with utfoert = 1 and the last event as a de-register, are defined as exported
replace event_type = 6 if (event_nr_bw == 0 & event_type == 3 & utfoert == 1)	 

* === Generating end event =====================================================
gen event_type_end = .
order event_type_end, a(event_type)
sort carid event_DMY event_type
bys carid (event_DMY event_type): replace event_type_end = event_type[_n + 1]

* Adding end event time
bys carid (event_DMY event_type): gen event_end_DMY = event_DMY[_n + 1]
format event_end_DMY %td
order event_end_DMY, a(event_DMY)

* No events should start with exporting or scrapping - these are now defined as end events
drop if event_type_end == . & (event_type == 6 | event_type == 7) 

* We also remove the last events that are starting with de-registration
drop if event_type_end == . & event_type == 3 /* 718,175 observations deleted */

/* Adding the last end event - cars that are neither exported or scrapped,
   or deregistered as the final event, are kept throughout the period */
replace event_type_end = 8 if event_type_end == .
replace event_end_DMY = mdy(12,31,2017) if event_end_DMY == .   

* === Labelling the data =======================================================

* Labels for events
label define events 1 "New" 2 "Import" 3 "De-register" 4 "Re-register" 5 "EU control" 6 "Export" 7 "Scrap" 8 "Keep", replace

label values event_type events
label values event_type_end events

* Labels for fuel types
label define fuel 0 "Unknown" 1 "Gasoline" 2 "Diesel" 3 "Kerosene" 4 "Gas" 5 "BEV" 6 "Hydrogen" 7 "Gasoline hybrid" 8 "Diesel Hybrid" 9 "Other", replace
label values fuel fuel
* Some fuel types are missing - fill them in
drop aux
sort carid event_DMY event_type
bys carid (event_DMY event_type): egen aux = mean(fuel)
replace fuel = aux if fuel == .
drop aux

* === Adding owner type ========================================================
gen ownertype = (substr(ownerid,1,1) == "P")
label define ownertype 0 "Firm" 1 "Individual"
label values ownertype ownertype
* === Counting the EU controls =================================================
sort carid event_DMY event_type
gen eu_ctrl_nr = 0
replace eu_ctrl_nr = 1 if event_type == 5
bys carid (event_DMY event_end_DMY event_type): replace eu_ctrl_nr = sum(eu_ctrl_nr)
replace eu_ctrl_nr = . if event_type != 5

* === Days of each event ===
gen event_days = event_end_DMY - event_DMY
sort carid event_DMY event_type
bys carid (event_DMY event_end_DMY event_type): gen days_cum = sum(event_days)

* === Days each car is in use each event ===
gen event_days_use = event_days
* Setting this to zero if the event is de-registation
replace event_days_use = 0 if event_type == 3
sort carid event_DMY event_type
bys carid (event_DMY event_end_DMY event_type): gen days_use_cum = sum(event_days_use)

/* The variable generated below will be used to calculate kms driven per day 
between each EU control */
gen days_use_prior = event_days_use // days used for each event
replace days_use_prior = days_use_prior + ( event_DMY - freg_DMY ) ///
	if event_type == 2 // Adding days from freg to import date to used imported cars
bys carid (event_DMY event_end_DMY event_type): ///
	replace days_use_prior = sum(days_use_prior) // Creating the cumulative sum
replace days_use_prior = days_use_prior - event_days_use /* subtracting event days
to make sure the variable measures days prior to the START of the event, and not the end */
	
* === Calculating average driving per day the car is operational ===============
/* This will be calculated per registration as the change in kms divided by
the number of days the car is operational */
gen kmperday = . /* km per day - raw value, no imputations or corrections */

order eu_ctrl_nr event_days_use days_use_prior kmperday, a(km)

* Calculating km per day for the first odometer reading
replace kmperday = km / days_use_prior if eu_ctrl_nr == 1
* Calculating km per day for the remaining odometer readings
bys carid (eu_ctrl_nr): replace kmperday = (km - km[_n-1]) ///
	/(days_use_prior - days_use_prior[_n-1]) ///
	if eu_ctrl_nr > 1 & eu_ctrl_nr != .

/* Now we have km per day for each EU control, next step is to distribute it to 
the appropriate events */
gen kmperday_eu = kmperday
replace kmperday = .
order kmperday_eu, a(kmperday)
sort carid event_DMY event_end_DMY event_type

/* Filling in values such that the event that ENDS with the EU control gets
   the appropriate kmperday */
bys carid (event_DMY event_end_DMY event_type): ///
	replace kmperday = kmperday_eu[_n+1]

/* Filling in the remaining values */
forvalues i = 1/17 { /* 17 times ensures that the last replace command has zero changes */
	bys carid (event_DMY event_end_DMY event_type): ///
		replace kmperday = kmperday[_n+1] if kmperday == .
	di `i'
}

gen kmperevent = kmperday * event_days_use

/* Dataset is now ready to be transformed from carid-ownerid-event
   to carid-ownerid-event-year */

save "${newdata}car_owner_event", replace
* ==============================================================================

